/*******************************************************************************
* Objective: Create dataset of Fuel exports (% of merchandise exports)
*******************************************************************************/

version 16

* Stablish Working Directory ***************************************************
cd "$workdirectory"

* Settings *********************************************************************
capture log close
clear all
set more off

********************************************************************************
**# Step 1.1: Importing and combining raw datasets
/*******************************************************************************

Fuels (% of Merchandise Exports)
Source: World Bank WDI (Beta)
Notes: We take values for all countries, in all months and years versions of the variable, then take averages.
       We take 0s as missing values
	   Data accessed 12.20.2020

Fuels (% of Merchandise Exports)
Source: World Bank WDI (NO BETA)
Notes: We take 0s as missing values
       Data accessed 12.17.2020

*******************************************************************************/

*-------------------------------------------------------------------------------
* Importing Beta data 

clear all
import excel "raw_datasets/fuel_x_b.xlsx", sheet("Data") firstrow clear
			
* Genereting missing values
quietly{
	foreach var of varlist y1960-y2019{
	replace `var' = "" if `var' == ".." | `var' == "0"
	}
destring y*, replace
}
	
* Matching variable name of country code with the one used in the main dataset
rename CountryCode code_wb

* Taking average over all observations of each year
collapse y*, by(code_wb)

* Generating year 2020 and 1960, dropping 2019, 1961 and 1962
rename y2019 y2020
drop y1960
drop y1961
rename y1962 y1960

* Reshaping into long shape and renaming variables
reshape long y, i(code_wb) j(year)
rename y fuels_x_b

* Keeping only desired countries
merge m:1 code_wb using "processed_datasets/ccode", keepusing(country_wb)
keep if _merge == 3
drop _merge country_wb

* Saving
save "processed_datasets/dataset_fuels(10.04.2021).dta", replace

*-------------------------------------------------------------------------------
* Importing Standard data

clear all
import excel "raw_datasets/fuel_x.xlsx", sheet("Data") firstrow clear
			
* Genereting missing values
quietly{
	foreach var of varlist y1960-y2020{
	replace `var' = "" if `var' == ".." | `var' == "0"
	}
destring y*, replace
}
	
* Matching variable name of country code with the one used in the main dataset
rename CountryCode code_wb

* Generating year 2020, dropping 2019
drop y2020
rename y2019 y2020
drop y1960
drop y1961
rename y1962 y1960

* Reshaping into long shape and renaming variable
reshape long y, i(code_wb) j(year)
rename y fuels_x

* Keeping only desired countries
merge m:1 code_wb using "processed_datasets/ccode", keepusing(country_wb)
keep if _merge == 3
drop _merge country_wb

* Cleaning
keep code_wb year fuels_x

* Merging to main dataset
tempfile fuelori
save `fuelori'

use "processed_datasets/dataset_fuels(10.04.2021).dta", clear
merge 1:1 code_wb year using `fuelori'
drop _merge
save "processed_datasets/dataset_fuels(10.04.2021).dta", replace

********************************************************************************
**# Step 1.2: Importing and combining raw datasets
/*******************************************************************************
Number of oil fields
*******************************************************************************/

clear all
import excel "raw_datasets\sum_petrodata.xlsx", sheet("Sheet1") firstrow clear

* Creating sum of field by country
gen num_fields = 1
collapse (sum) num_fields, by(COUNTRY)

* Renaming countries and adding code_wb
rename COUNTRY country_wup

replace country_wup = "Syrian Arab Republic" if country_wup == "Syria"
replace country_wup = "China, Taiwan Province of China" if country_wup == "Taiwan"
replace country_wup = "Venezuela (Bolivarian Republic of)" if country_wup == "Venezuela"
replace country_wup = "Republic of Korea" if country_wup == "South Korea"
replace country_wup = "Myanmar" if country_wup == "Burma"
replace country_wup = "Viet Nam" if country_wup == "Vietnam"
replace country_wup = "Tanzania" if country_wup == "Tanzania"
replace country_wup = "Côte d'Ivoire" if country_wup == "Cote d'Ivoire"
replace country_wup = "Congo" if country_wup == "Congo (Brazzaville)"
replace country_wup = "Bolivia (Plurinational State of)" if country_wup == "Bolivia"
replace country_wup = "Iran (Islamic Republic of)" if country_wup == "Iran"
replace country_wup = "Democratic Republic of the Congo" if country_wup == "Congo (Kinshasa)"
replace country_wup = "Brunei Darussalam" if country_wup == "Brunei"

merge 1:1 country_wup using "processed_datasets/ccode", keepusing(code_wb)
keep if _merge == 3
drop _merge country_wup

tempfile petrodata
save `petrodata'

* Merging to main dataset

use "processed_datasets/dataset_fuels(10.04.2021).dta", clear
merge m:1 code_wb using `petrodata'
replace num_fields = 0 if num_fields == .
drop _merge

********************************************************************************
**# Step 2.1: Missing Data - Approximating 2020 and 1960
********************************************************************************

* Approximating 2020

sort code_wb year
gen identi = 0
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[58] == . & year == 2018
by code_wb: replace fuels_x = fuels_x[57] if fuels_x == . & year == 2020
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[58] == . & year == 2017
by code_wb: replace fuels_x = fuels_x[56] if fuels_x == . & year == 2020
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[58] == . & year == 2016
by code_wb: replace fuels_x = fuels_x[55] if fuels_x == . & year == 2020
replace fuels_x = . if identi == 1
drop identi

* Approximating 1960

sort code_wb year
gen identi = 0
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[1] == . & year == 1963
by code_wb: replace fuels_x = fuels_x[2] if fuels_x == . & year == 1960
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[1] == . & year == 1964
by code_wb: replace fuels_x = fuels_x[3] if fuels_x == . & year == 1960
replace fuels_x = . if identi == 1
drop identi

********************************************************************************
**# Step 2.2: Missing Data - Apply linear approximation
********************************************************************************

* Linear approximation ---------------------------------------------------------
sort code_wb year
by code_wb: ipolate fuels_x year, gen(intfuels)
gen interpolated_a = 0
replace interpolated_a = 1 if fuels_x == . & intfuels != .
replace fuels_x = intfuels if fuels_x == . & intfuels != .
drop intfuels

********************************************************************************
**# Step 2.3: Missing Data - Using Beta Data
********************************************************************************

* Using Beta Data - We use this when the beta data follows a stable pattern (no extreme outliers) 
*					that, when comparable, matches the non-beta values.

replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "ARE"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "BHR"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "CRI"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "DOM"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "DZA"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "EGY"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "GMB"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "GTM"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "GUY"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "LBR"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "LKA"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "MUS"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "NPL"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "OMN"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "PNG"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "QAT"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "SAU"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "SDN"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "TTO"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "UGA"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "URY"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "ZAF"
replace fuels_x = fuels_x_b if fuels_x == . & code_wb == "ZMB"

* Approximating 2020 and 1960---------------------------------------------------

* Approximating 2020

sort code_wb year
gen identi = 0
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[58] == . & year == 2018
by code_wb: replace fuels_x = fuels_x[57] if fuels_x == . & year == 2020
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[58] == . & year == 2017
by code_wb: replace fuels_x = fuels_x[56] if fuels_x == . & year == 2020
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[58] == . & year == 2016
by code_wb: replace fuels_x = fuels_x[55] if fuels_x == . & year == 2020
replace fuels_x = . if identi == 1
drop identi

* Approximating 1960

sort code_wb year
gen identi = 0
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[1] == . & year == 1963
by code_wb: replace fuels_x = fuels_x[2] if fuels_x == . & year == 1960
by code_wb: replace identi = 1 if fuels_x != . & fuels_x[1] == . & year == 1964
by code_wb: replace fuels_x = fuels_x[3] if fuels_x == . & year == 1960
replace fuels_x = . if identi == 1
drop identi

* Linear approximation ---------------------------------------------------------

sort code_wb year
by code_wb: ipolate fuels_x year, gen(intfuels)
gen interpolated_b = 0
replace interpolated_b = 1 if fuels_x == . & intfuels != .
replace fuels_x = intfuels if fuels_x == . & intfuels != .
drop intfuels

********************************************************************************
**# Step 2.4: Missing Data - Sudan
********************************************************************************

* We take 2010 as 2020
drop if code_wb == "SSD"
replace fuels_x = .				if year >  2010 & code_wb == "SDN"
replace fuels_x = 84.952557		if year == 2020 & code_wb == "SDN"

********************************************************************************
**# Step 2.5: Missing Data - Country-by-country
********************************************************************************

* We rely mainly on USGS

* Afhanistan - USGS Country and Region Reports
* 1963 Data reports no mineral exports in early 60s
replace fuels_x = 0 			if year == 1960 & code_wb == "AFG"

* Burundi - We use 1965 as 1960
replace fuels_x = 0.00555124 	if year == 1960 & code_wb == "BDI"
replace fuels_x = . 			if year == 1965 & code_wb == "BDI"

* Bangladesh - We use 2015 as 2020 - We assume 0 for 1960
replace fuels_x = 0				if year == 1960 & code_wb == "BGD"
replace fuels_x = 0.56129072	if year == 2020 & code_wb == "BGD"
replace fuels_x = .				if year == 2015 & code_wb == "BGD"

* Belize - USGS Country and Region Reports
* 1963 data (used for 1960) and 1970 reports no fuel exports
replace fuels_x = 0				if year == 1960 & code_wb == "BLZ"
replace fuels_x = 0				if year == 1970 & code_wb == "BLZ"

* Botswana - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "BWA"
replace fuels_x = 0				if year == 1970 & code_wb == "BWA"
replace fuels_x = 0				if year == 1980 & code_wb == "BWA"
replace fuels_x = 0				if year == 1990 & code_wb == "BWA"

* DRC - USGS Country and Region Reports
replace fuels_x = 20.7			if year == 1985 & code_wb == "COD"
replace fuels_x = 5.5			if year == 1990 & code_wb == "COD"
replace fuels_x = 11			if year == 2010 & code_wb == "COD"
replace fuels_x = 6				if year == 2020 & code_wb == "COD"

* Cape Verde - USGS Country and Region Reports
* 1976 reports claims that no mineral activity in the island was important.
* We leave 1960 as 0 because of this. Similarly, the most recent reports don't
* indicate fuel extraction in the islands
replace fuels_x = 0				if year == 1960 & code_wb == "CPV"
replace fuels_x = 0				if year == 2020 & code_wb == "CPV"

* Cuba - USGS Country and Region Reports
* We take 2006 as 2020
replace fuels_x = 0				if year == 1960 & code_wb == "CUB"
replace fuels_x = 0				if year == 1990 & code_wb == "CUB"
replace fuels_x = .				if year == 2006 & code_wb == "CUB"
replace fuels_x = 0.00027832	if year == 2020 & code_wb == "CUB"

* Djibouti - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "DJI"
replace fuels_x = 0				if year == 1980 & code_wb == "DJI"
replace fuels_x = 0				if year == 2020 & code_wb == "DJI"

* Eritrea - USGS Country and Region Reports
* Assumed 0 in 1960 based on no fuel production in 2000s and 2010s
replace fuels_x = 0				if year == 1960 & code_wb == "ERI"
replace fuels_x = 0				if year == 1996 & code_wb == "ERI"
replace fuels_x = 0				if year == 2020 & code_wb == "ERI"

* Ethiopia - USGS Country and Region Reports
* Assumed 0 based on no fuel production in 2000s and 2010s
replace fuels_x = 0				if year == 1960 & code_wb == "ETH"
replace fuels_x = 0				if year == 1990 & code_wb == "ETH"

* Fiji - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "FJI"

* Gabon - USGS Country and Region Reports
replace fuels_x = 76			if year == 2020 & code_wb == "GAB"

* Guinea - USGS Country and Region Reports
* There doens't seem to be reports of fuel exports in the 60s or 80s
replace fuels_x = 0				if year == 1960 & code_wb == "GIN"

* Guinea-Bissau - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "GNB"
replace fuels_x = 0				if year == 2010 & code_wb == "GNB"
replace fuels_x = 0				if year == 2020 & code_wb == "GNB"

* Haiti - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "HTI"
replace fuels_x = 0				if year == 1970 & code_wb == "HTI"
replace fuels_x = 0				if year == 1980 & code_wb == "HTI"
replace fuels_x = 0				if year == 2000 & code_wb == "HTI"
replace fuels_x = 0				if year == 2010 & code_wb == "HTI"
replace fuels_x = 0				if year == 2020 & code_wb == "HTI"

* Kuwait - USGS Country and Region Reports
replace fuels_x = 100			if year == 1960 & code_wb == "KWT"

* Lebanon - Use 1967 as 1960
replace fuels_x = 0.16068622	if year == 1960 & code_wb == "LBN"
replace fuels_x = .				if year == 1967 & code_wb == "LBN"

* Liberia - USGS Country and Region Reports
replace fuels_x = 0				if year == 2020 & code_wb == "LBR"

* Lesotho - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "LSO"

* Macau - Take 1974 as 1960
replace fuels_x = 0.00019865	if year == 1960 & code_wb == "MAC"
replace fuels_x = .				if year == 1974 & code_wb == "MAC"
replace fuels_x = 0				if year == 2020 & code_wb == "MAC"

* Mongolia - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "MNG"

* Malawi - USGS Country and Region Reports
* We use 1968 as 1960
replace fuels_x = 0.01225213	if year == 1960 & code_wb == "MWI"
replace fuels_x = .				if year == 1968 & code_wb == "MWI"

* Namibia - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "NAM"
replace fuels_x = 0				if year == 1990 & code_wb == "NAM"

* Nicaragua - USGS Country and Region Reports
* We use 1965 as 1960
replace fuels_x = 0.0084279		if year == 1960 & code_wb == "NIC"
replace fuels_x = .				if year == 1965 & code_wb == "NIC"

* Nepal - USGS Country and Region Reports
* We use 1968 as 1960
replace fuels_x = 0.00018601	if year == 1960 & code_wb == "NPL"
replace fuels_x = .				if year == 1968 & code_wb == "NPL"

* Oman - USGS Country and Region Reports
* We use 1968 as 1960
replace fuels_x = 99.91673		if year == 1960 & code_wb == "OMN"
replace fuels_x = .				if year == 1968 & code_wb == "OMN"

* PNG - USGS Country and Region Reports
* We use 2014 as 2020
replace fuels_x = 9.5			if year == 2020 & code_wb == "PNG"

* Rwanda - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "RWA"

* Solomon Islands - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "SLB"

* Sierra Leone - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960 & code_wb == "SLE"

* Somalia - USGS Country and Region Reports
replace fuels_x = 0 			if year == 2020	& code_wb == "SOM"

* Suriname - USGS Country and Region Reports
replace fuels_x = 0 			if year == 1960	& code_wb == "SUR"

* Swaziland - USGS Country and Region Reports
replace fuels_x = 0 			if year == 1960	& code_wb == "SWZ"

* Syria - USGS Country and Region Reports
* Based on reports from EIA (https://www.eia.gov/international/analysis/country/SYR)
* it seems like Syrian oil fell dramatically after the conflict started (around 90%).
* Based on this, we assign it a value of 0 in 2020.

replace fuels_x = 0 			if year == 1960	& code_wb == "SYR"
replace fuels_x = 0 			if year == 1965	& code_wb == "SYR"
replace fuels_x = 0 			if year == 2020	& code_wb == "SYR"

* Chad - USGS Country and Region Reports
* For 2000s, Source Trade Map (ITC) - Mirror Data * Not available in USGS
replace fuels_x = 0 			if year == 1980	& code_wb == "TCD"
replace fuels_x = 0 			if year == 2000	& code_wb == "TCD"
replace fuels_x = 88.37			if year == 2010	& code_wb == "TCD"
replace fuels_x = 92.9 			if year == 2020	& code_wb == "TCD"

* Trinidad and Tobago - USGS Country and Region Reports (2015 used for 2020). For 1960 we use WB 1965 
replace fuels_x = 83.108416		if year == 1960	& code_wb == "TTO"
replace fuels_x = . 			if year == 1965	& code_wb == "TTO"
replace fuels_x = 78 			if year == 2020	& code_wb == "TTO"

* Taiwan - From minfuel data
replace fuels_x = 0				if 				  code_wb == "TWN"

* Tanzania - From minfuel data
replace fuels_x = 0				if year == 1960	& code_wb == "TZA"

* Venezuela - Source: WTO Data (https://data.wto.org/) * ESTIMATES
* Notes: We use 2019 as 2020
replace fuels_x = 93.9			if year == 2020 & code_wb == "VEN"

* Zambia - For 1960 we use WB 1960 
replace fuels_x = 0.05096353	if year == 1960	& code_wb == "ZMB"
replace fuels_x = . 			if year == 1965	& code_wb == "ZMB"

* Zimbabwe - USGS Country and Region Reports
replace fuels_x = 0				if year == 1960	& code_wb == "ZWE"

* Linear approximation ---------------------------------------------------------

sort code_wb year
by code_wb: ipolate fuels_x year, gen(intfuels)
gen interpolated_c = 0
replace interpolated_c = 1 if fuels_x == . & intfuels != .
replace fuels_x = intfuels if fuels_x == . & intfuels != .
drop intfuels

*-------------------------------------------------------------------------------
* Fixing potential errors in the data
*-------------------------------------------------------------------------------

* United Arab Emirates - WB data presents a sharp decline in fuel exports in the 1980s and 1990s
replace fuels_x = .				if year > 1979 & year < 1999 & code_wb == "ARE"
replace fuels_x = 85			if year == 1985 & code_wb == "ARE"
replace fuels_x = 75			if year == 1990 & code_wb == "ARE"
replace fuels_x = 80			if year == 1992 & code_wb == "ARE"

* Bahrain- WB data presents a sharp decline in fuel exports in the 1980s and 1990s
* For 1995, we know that 90 % of exports are mineral and aluminium is 25%. Assume difference is fuel
replace fuels_x = .				if year > 1978 & year < 2001 & code_wb == "BHR"
replace fuels_x = 75			if year == 1987 & code_wb == "BHR"
replace fuels_x = 80			if year == 1990 & code_wb == "BHR"
replace fuels_x = 65			if year == 1995 & code_wb == "BHR"

* Bahamas - Bahamas seems export/have exported lots of oil-related products from refinery, but not 
* from own oil fields. The number of fields from Petrodata is 0. USGS reports
* seem to indicate that all oil related activity is related to refinery without extraction
replace fuels_x = 0				if 				  code_wb == "BHS"

* Bhutan - Accordint to the USGS 2000 report of the country 
* "Bhutan has no significant reserves of coal, natural gas, or petroleum."
* Most of the high values of export of fuel present in the WB data seem to
* be coming from the hydroelectric industry.
* WARNING: we could be missing coal exports for late 2000s by replacing all fuels_x = 0
replace fuels_x = 0				if 				  code_wb == "BTN"

* Djibouti - USGS Country and Region Reports
* Djibouti doesn't seem to produce fuels, the data from WB could be geothermal fluids
replace fuels_x = 0				if 				  code_wb == "DJI"

* Dominican Republic - USGS Country and Region Reports
* WB data seems to be contaminated by the production of refinery, but DOM doesn't
* seem to extract oil
replace fuels_x = 0				if year >  1997 & code_wb == "DOM"

* Equatorial Guinea - USGS Country and Region Reports
* Oil becomes relevant post 1996
replace fuels_x = .				if 				  code_wb == "GNQ"
replace fuels_x = 0				if year == 1960 & code_wb == "GNQ"
replace fuels_x = 0				if year == 1990 & code_wb == "GNQ"
replace fuels_x = 0				if year == 1995 & code_wb == "GNQ"
replace fuels_x = 99			if year == 2005 & code_wb == "GNQ"
replace fuels_x = 88			if year == 2020 & code_wb == "GNQ"

* India - USGS Country and Region Reports
replace fuels_x = .				if year >  1979	& code_wb == "IND"
replace fuels_x = 0.43129883	if year == 2020	& code_wb == "IND"

* Iraq - USGS Country and Region Reports
* We use 1965 for 1960, 2004 for 2000, 2009 for 2010 and 2015 for 2020
replace fuels_x = .				if 				  code_wb == "IRQ"
replace fuels_x = 95			if year == 1960 & code_wb == "IRQ"
replace fuels_x = 99			if year == 1980 & code_wb == "IRQ"
replace fuels_x = 90			if year == 1990 & code_wb == "IRQ"
replace fuels_x = 98			if year == 2000 & code_wb == "IRQ"
replace fuels_x = 98			if year == 2010 & code_wb == "IRQ"
replace fuels_x = 100			if year == 2020 & code_wb == "IRQ"

* Jamaica - USGS Country and Region Reports
* World Bank values reflect refinery products exports, but Jamaica doesn't seem to
* have oil reserves.
replace fuels_x = 0				if 				  code_wb == "JAM"

* Kenya - USGS Country and Region Reports
* World Bank values reflect refinery products exports, but Kenya doesn't seem to
* have oil reserves (in 2012 reserves were found, so 2020 export values could be larger than 0)
replace fuels_x = 0				if 				  code_wb == "KEN"

* Republic of Korea - USGS Country and Region Reports
replace fuels_x = 0				if year > 1969	& code_wb == "KOR"

* Kuwait - USGS Country and Region Reports
* WB reports very low values for the 80s, we leave those values as missing for later interpolation
replace fuels_x = .				if year > 1980 & year < 1990 & code_wb == "KWT"

* Laos - USGS Country and Region Reports
replace fuels_x = 0				if 				  code_wb == "LAO"

* Sri Lanka - USGS Country and Region Reports
replace fuels_x = 0				if 				  code_wb == "LKA"

* Mexico - USGS Country and Region Reports
* We use 2016 as 2020
replace fuels_x = .				if year > 1982	& code_wb == "MEX"
replace fuels_x = 69			if year == 1984	& code_wb == "MEX"
replace fuels_x = 34			if year == 1990	& code_wb == "MEX"
replace fuels_x = 8 			if year == 2001	& code_wb == "MEX"
replace fuels_x = 15 			if year == 2007	& code_wb == "MEX"
replace fuels_x = 4 			if year == 2020	& code_wb == "MEX"

* Mozambique - USGS Country and Region Reports
* Mozambique WB data seems to include import-export of petroleoum products
* We use 1965 as 1960 and 1985

replace fuels_x = 0.2 			if year == 1960	& code_wb == "MOZ"
replace fuels_x = 0.2 			if year == 1985	& code_wb == "MOZ"
replace fuels_x = 0 			if year == 1999	& code_wb == "MOZ"
replace fuels_x = 19 			if year == 2020	& code_wb == "MOZ"

* Mauritania - USGS Country and Region Reports
* We use 2016 as 2020 
replace fuels_x = .				if 				  code_wb == "MRT"
replace fuels_x = 0 			if year == 1960	& code_wb == "MRT"
replace fuels_x = 0 			if year == 1970	& code_wb == "MRT"
replace fuels_x = 0 			if year == 1980	& code_wb == "MRT"
replace fuels_x = 0 			if year == 1994	& code_wb == "MRT"
replace fuels_x = 0 			if year == 2000	& code_wb == "MRT"
replace fuels_x = 14 			if year == 2010	& code_wb == "MRT"
replace fuels_x = 6.2 			if year == 2020	& code_wb == "MRT"

* Panama - USGS Country and Region Reports
replace fuels_x = .				if 				  code_wb == "PAN"
replace fuels_x = 0 			if year == 1960	& code_wb == "PAN"
replace fuels_x = 0 			if year == 2020	& code_wb == "PAN"

* Paraguay - USGS Country and Region Reports
replace fuels_x = .				if 				  code_wb == "PRY"
replace fuels_x = 0 			if year == 1960	& code_wb == "PRY"
replace fuels_x = 0 			if year == 2020	& code_wb == "PRY"

* Senegal - USGS Country and Region Reports
replace fuels_x = 0 			if 				  code_wb == "SEN"

* We modify Singapore fuel exports to 0, as the country does not have oil reserves, all it's exports are the result of importing and refining.
replace fuels_x = 0 			if 				  code_wb == "SGP"

* El Salvador - USGS Country and Region Reports
replace fuels_x = .				if 				  code_wb == "SLV"
replace fuels_x = 0 			if year == 1960	& code_wb == "SLV"
replace fuels_x = 0 			if year == 2020	& code_wb == "SLV"

* Somalia - USGS Country and Region Reports
replace fuels_x = 0 			if year == 1980	& code_wb == "SOM"

* Suriname - USGS Country and Region Reports
* We use 2015 as 2020
replace fuels_x = . 			if year >  2014	& code_wb == "SUR"
replace fuels_x = 9.4 			if year == 2020	& code_wb == "SUR"

* Linear approximation ---------------------------------------------------------

sort code_wb year
by code_wb: ipolate fuels_x year, gen(intfuels)
gen interpolated_d = 0
replace interpolated_d = 1 if fuels_x == . & intfuels != .
replace fuels_x = intfuels if fuels_x == . & intfuels != .
drop intfuels

********************************************************************************
**# Step 3.1: Finalize Data - Formating
********************************************************************************

* Fix higher than 100 values ---------------------------------------------------

replace fuels_x = 100 if fuels_x > 100 & fuels_x != .

* Temporal fixes for Missings --------------------------------------------------

gen flag = 1 if fuels_x == .
gen year2 = -year
sort code_wb year2
by code_wb: replace fuels_x = fuels_x[_n-1] if fuels_x == .
sort code_wb year
drop year2

********************************************************************************
**# Step 3.1: Finalize Data - Calculate Moving Averages
********************************************************************************

* Generating moving averages *

* Sorting
sort code_wb year

* Moving Average (-1/+1)

* Generating Variable ma1
gen fuels_x_ma1 = 0

* Estimating ma1 for the sample except 1960 and 2020
foreach ma1 of numlist 2/57{
	by code_wb: replace fuels_x_ma1 = (fuels_x[`ma1'-1] + fuels_x[`ma1'] + fuels_x[`ma1'+1])/3 if _n == `ma1' 
}

* Estimating ma1 for 1960 and 2020
by code_wb: replace fuels_x_ma1 = (fuels_x[1] + fuels_x[2])/2 if _n == 1
by code_wb: replace fuels_x_ma1 = (fuels_x[58] + fuels_x[57])/2 if _n == 58


* Taking moving averages with a window of 2

* Generating Variable ma2
gen fuels_x_ma2 = 0

foreach ma2 of numlist 3/56{
	by code_wb: replace fuels_x_ma2 = ///
	(fuels_x[`ma2'-2] + fuels_x[`ma2'-1] + fuels_x[`ma2'] + fuels_x[`ma2'+1] + fuels_x[`ma2'+2])/5 ///
	if _n == `ma2' 
}
	
* Estimating ma2 for 1960, 1961, 2018 and 2020
by code_wb: replace fuels_x_ma2 = (fuels_x[1] + fuels_x[2] + fuels_x[3])/3 if _n == 1
by code_wb: replace fuels_x_ma2 = (fuels_x[1] + fuels_x[2] + fuels_x[3] + fuels_x[4])/4 if _n == 2
by code_wb: replace fuels_x_ma2 = (fuels_x[55] + fuels_x[56] + fuels_x[57] + fuels_x[58])/4 if _n == 57
by code_wb: replace fuels_x_ma2 = (fuels_x[58] + fuels_x[57] + fuels_x[56])/3 if _n == 58

********************************************************************************
**# Step 3.3: Final edits and labeling
********************************************************************************

* Final Fixes
foreach v in fuels_x fuels_x_ma1 fuels_x_ma2{
	replace `v' = . if flag == 1
}

keep code_wb year fuels_x fuels_x_ma1 fuels_x_ma2 num_fields

rename code_wb ccode
la var ccode 		"Country code (from World Bank)"
la var year 		"Year t"
la var fuels_x 		"Exports of fuels (% of Merch X)"
la var fuels_x_ma1	"Exports of fuels (% of Merch X) (MA1)"
la var fuels_x_ma2	"Exports of fuels (% of Merch X) (MA2)"
la var num_fields	"Number of oil fields in country (Historical)"

save "processed_datasets/dataset_fuels(10.04.2021).dta", replace
